{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test SimpleCache File Operations\n",
    "\n",
    "This notebook tests the file addition and deletion functionality of AutoCoderRAGAsyncUpdateQueue."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created test directory: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import tempfile\n",
    "import shutil\n",
    "import time\n",
    "from autocoder.rag.variable_holder import VariableHolder\n",
    "from tokenizers import Tokenizer\n",
    "\n",
    "VariableHolder.TOKENIZER_PATH = \"/Users/allwefantasy/Downloads/tokenizer.json\"\n",
    "VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(VariableHolder.TOKENIZER_PATH)\n",
    "\n",
    "\n",
    "from autocoder.rag.cache.simple_cache import AutoCoderRAGAsyncUpdateQueue\n",
    "\n",
    "\n",
    "# Create a temporary directory for testing\n",
    "test_dir = tempfile.mkdtemp()\n",
    "print(f\"Created test directory: {test_dir}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-12-17 14:43:13.512\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mprocess_queue\u001b[0m:\u001b[36m134\u001b[0m - \u001b[1m/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt is detected to be removed\u001b[0m\n",
      "\u001b[32m2024-12-17 14:44:14.746\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mprocess_queue\u001b[0m:\u001b[36m138\u001b[0m - \u001b[1m/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py is detected to be updated\u001b[0m\n",
      "\u001b[32m2024-12-17 14:44:14.748\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.utils\u001b[0m:\u001b[36mprocess_file_local\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mLoad file /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py in 0.0012199878692626953\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "# Initialize the cache manager\n",
    "cache_manager = AutoCoderRAGAsyncUpdateQueue(\n",
    "    path=test_dir,\n",
    "    ignore_spec=None,\n",
    "    required_exts=[\".txt\", \".py\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-12-17 14:43:06.436 | INFO     | autocoder.rag.utils:process_file_in_multi_process:66 - Load file /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt in 0.0004999637603759766\n",
      "\u001b[32m2024-12-17 14:43:06.520\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m106\u001b[0m - \u001b[1m检查文件是否有更新.....\u001b[0m\n",
      "\u001b[32m2024-12-17 14:43:06.522\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m119\u001b[0m - \u001b[1mfiles_to_process: []\u001b[0m\n",
      "\u001b[32m2024-12-17 14:43:06.523\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m120\u001b[0m - \u001b[1mdeleted_files: set()\u001b[0m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cache after adding file:\n",
      "File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt\n",
      "Content: [{'module_name': '##File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt', 'source_code': 'This is a test file', 'tag': '', 'tokens': 13, 'metadata': {}}]\n"
     ]
    }
   ],
   "source": [
    "# Test file addition\n",
    "def test_file_addition():\n",
    "    # Create a test file\n",
    "    test_file = os.path.join(test_dir, \"test1.txt\")\n",
    "    with open(test_file, \"w\") as f:\n",
    "        f.write(\"This is a test file\")\n",
    "    \n",
    "    # Wait for the cache to update\n",
    "    time.sleep(2)\n",
    "    \n",
    "    # Get cache and check if file is added\n",
    "    cache = cache_manager.get_cache()\n",
    "    print(\"\\nCache after adding file:\")\n",
    "    for file_path, data in cache.items():\n",
    "        print(f\"File: {file_path}\")\n",
    "        print(f\"Content: {data['content']}\")\n",
    "\n",
    "test_file_addition()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[6], line 17\u001b[0m\n\u001b[1;32m     14\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mCache after deleting file:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFiles in cache: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(cache\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 17\u001b[0m \u001b[43mtest_file_deletion\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[6], line 5\u001b[0m, in \u001b[0;36mtest_file_deletion\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtest_file_deletion\u001b[39m():\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;66;03m# Delete the test file\u001b[39;00m\n\u001b[1;32m      4\u001b[0m     test_file \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(test_dir, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest1.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m     \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mremove\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      6\u001b[0m     \u001b[38;5;66;03m# trigger update\u001b[39;00m\n\u001b[1;32m      7\u001b[0m     cache \u001b[38;5;241m=\u001b[39m cache_manager\u001b[38;5;241m.\u001b[39mget_cache()\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test1.txt'"
     ]
    }
   ],
   "source": [
    "# Test file deletion\n",
    "def test_file_deletion():\n",
    "    # Delete the test file\n",
    "    test_file = os.path.join(test_dir, \"test1.txt\")\n",
    "    os.remove(test_file)\n",
    "    # trigger update\n",
    "    cache = cache_manager.get_cache()\n",
    "    \n",
    "    # Wait for the cache to update\n",
    "    time.sleep(4)\n",
    "    \n",
    "    # Get cache and verify file is removed\n",
    "    cache = cache_manager.get_cache()\n",
    "    print(\"\\nCache after deleting file:\")\n",
    "    print(f\"Files in cache: {list(cache.keys())}\")\n",
    "\n",
    "test_file_deletion()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cache after initial file creation:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-12-17 14:44:11.507 | INFO     | autocoder.rag.utils:process_file_in_multi_process:66 - Load file /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py in 0.0006647109985351562\n",
      "\u001b[32m2024-12-17 14:44:11.574\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m106\u001b[0m - \u001b[1m检查文件是否有更新.....\u001b[0m\n",
      "\u001b[32m2024-12-17 14:44:11.576\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m119\u001b[0m - \u001b[1mfiles_to_process: []\u001b[0m\n",
      "\u001b[32m2024-12-17 14:44:11.577\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m120\u001b[0m - \u001b[1mdeleted_files: set()\u001b[0m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py\n",
      "Content: [{'module_name': '##File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py', 'source_code': \"def hello():\\n    print('Hello')\", 'tag': '', 'tokens': 17, 'metadata': {}}]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-12-17 14:44:14.586\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m106\u001b[0m - \u001b[1m检查文件是否有更新.....\u001b[0m\n",
      "\u001b[32m2024-12-17 14:44:14.588\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m119\u001b[0m - \u001b[1mfiles_to_process: [('/var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py', 'test2.py', 1734417852.5824633, 'f2bb1a30c5b271b44c14ce8d7456ac0a')]\u001b[0m\n",
      "\u001b[32m2024-12-17 14:44:14.589\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mautocoder.rag.cache.simple_cache\u001b[0m:\u001b[36mtrigger_update\u001b[0m:\u001b[36m120\u001b[0m - \u001b[1mdeleted_files: set()\u001b[0m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cache after file update:\n",
      "File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py\n",
      "Content: [{'module_name': '##File: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmpxy_b65fc/test2.py', 'source_code': \"def hello():\\n    print('Hello')\", 'tag': '', 'tokens': 17, 'metadata': {}}]\n"
     ]
    }
   ],
   "source": [
    "# Test file update\n",
    "def test_file_update():\n",
    "    # Create and update a test file\n",
    "    test_file = os.path.join(test_dir, \"test2.py\")\n",
    "    \n",
    "    # Create initial file\n",
    "    with open(test_file, \"w\") as f:\n",
    "        f.write(\"def hello():\\n    print('Hello')\")\n",
    "    \n",
    "    # Wait for initial cache update\n",
    "    time.sleep(2)\n",
    "    print(\"\\nCache after initial file creation:\")\n",
    "    cache = cache_manager.get_cache()\n",
    "    for file_path, data in cache.items():\n",
    "        print(f\"File: {file_path}\")\n",
    "        print(f\"Content: {data['content']}\")\n",
    "    \n",
    "    # Update the file\n",
    "    time.sleep(1)  # Ensure modification time is different\n",
    "    with open(test_file, \"w\") as f:\n",
    "        f.write(\"def hello():\\n    print('Hello World')\")\n",
    "    \n",
    "    # Wait for cache update\n",
    "    time.sleep(2)\n",
    "    print(\"\\nCache after file update:\")\n",
    "    cache = cache_manager.get_cache()\n",
    "    for file_path, data in cache.items():\n",
    "        print(f\"File: {file_path}\")\n",
    "        print(f\"Content: {data['content']}\")\n",
    "\n",
    "test_file_update()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cleaned up test directory: /var/folders/dm/0xljd5nn10b7bwwmwv8w5v100000gn/T/tmps40ibmpk\n"
     ]
    }
   ],
   "source": [
    "# Cleanup\n",
    "def cleanup():\n",
    "    cache_manager.stop()\n",
    "    shutil.rmtree(test_dir)\n",
    "    print(f\"\\nCleaned up test directory: {test_dir}\")\n",
    "\n",
    "cleanup()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
